/*
Purpose: create parental education variables from Census data
*/

use "data/clean/NAEP cohorts", clear

keep if bpl<=56&qbpl==0 //only keeping those with U.S. birthplace
keep if qage==0

//replace observations with missing if they have missing codes OR are allocated
replace ftotinc = . if ftotinc==9999999|qftotinc!=0
replace hhincome = . if hhincome==9999999|qhhincome!=0
replace rentgrs = . if qrent!=0|qownersh!=0|rent==0|ownershp==1|rentgrs==0
replace valueh = . if qvalueh!=0|qownersh!=0|valueh==9999999|ownershp==2
replace poverty = . if poverty==0

replace educ_mom = . if qeduc_mom!=0
replace educ_pop = . if qeduc_pop!=0
replace empstat_mom = . if empstat_mom==0|qempstat_mom!=0
replace empstat_pop = . if empstat_pop==0|qempstat_pop!=0
replace occ1950_mom = . if occ1950_mom==999|qocc_mom!=0
replace occ1950_pop = . if occ1950_pop==999|qocc_pop!=0

//create 1/0 flags for education

gen gcol_mom_2yr = educd_mom>=81&educd_mom!=.
replace gcol_mom_2yr = . if educd_mom==.

gen gcol_pop_2yr = educd_pop>=81&educd_pop!=.
replace gcol_pop_2yr = . if educd_pop==.

gen either_par_gcol_2yr = gcol_mom_2yr==1|gcol_pop_2yr==1
replace either_par_gcol_2yr = . if gcol_mom_2yr==.&gcol_pop_2yr==.

svyset cluster [pweight=perwt], strata(strata)

ren birthyr yob
gen yob_year = string(yob) + "_" + string(year)
levelsof yob_year, local(yob_year_groups)
gen mean_either_par_gcol_2yr = .
gen sd_either_par_gcol_2yr = .

foreach comb of local yob_year_groups {
	local yob_sub = real(substr("`comb'",1,4))
	di `yob_sub'
	local year_sub = real(substr("`comb'",6,4))
	di `year_sub'
	
	svy, subpop(if yob==`yob_sub'&year==`year_sub'): mean either_par_gcol_2yr
	estat sd
	
	replace mean_either_par_gcol_2yr = r(mean)[1,1] if yob==`yob_sub'&year==`year_sub'
	replace sd_either_par_gcol_2yr = r(sd)[1,1] if yob==`yob_sub'&year==`year_sub'
}

gen norm_either_par_gcol_2yr = (either_par_gcol_2yr - mean_either_par_gcol_2yr) / sd_either_par_gcol_2yr

collapse (mean) norm_either_par_gcol_2yr [pw=perwt], by(yob year bpl)

ren bpl fips_birth

bys yob fips_birth (year): gen row_count = _n
bys yob fips_birth (year): gen total_rows = _N
keep if row_count==total_rows //this keeps only maximum available year (so older cohorts, closer to 13yo)
drop row_count total_rows year

save "data/clean/state_year_ses_index", replace
